import pandas as pd

# Neo4j 关系文件生成：
#
# 导演——电影关系（导演）
#
# 演员——电影关系（出演）
#
# 导演——演员关系（合作）
#
# 电影——类型关系（属于）

# 读取以上生成的结点文件
df = pd.read_csv('Douban.csv', error_bad_lines=False)
df_film = pd.read_csv('out/film.csv', error_bad_lines=False)
df_directors = pd.read_csv('out/director.csv', error_bad_lines=False)
df_actors = pd.read_csv('out/actor.csv', error_bad_lines=False)
df_types = pd.read_csv('out/type.csv', error_bad_lines=False)
print(df_directors)

# # 遍历文件，拼接出关系表，主要代码如下：
director_films, actor_films, director_actors, film_types = [], [], [], []

for index, row in df.iterrows():
    # print(row)
    # filmname,directors,actors,type
    film_name = row['filmname']
    director = row['directors']
    actor = row['actors']
    types = row['type']

    directorList = director.split("/")
    actorList = actor.split("/")
    typeList = types.split("/")

    # 获取电影ID
    filmID = df_film['index:ID'].loc[df_film['film'] == film_name].values[0]
    print(filmID)

    # 生成导演-电影关系
    for dire in directorList:
        directorID = df_directors['index:ID'].loc[df_directors['director'] == dire].values[0]
        director_film = [directorID, filmID, '导演', '导演']
        director_films.append(director_film)

    # 生成演员-电影关系
    for act in actorList:
        actorID = df_actors['index:ID'].loc[df_actors['actor'] == act].values[0]
        actor_film = [actorID, filmID, '演出', '演出']
        actor_films.append(actor_film)

    # 生成导演-演员关系
    for dire in directorList:
        directorID = df_directors['index:ID'].loc[df_directors['director'] == dire].values[0]
        for act in actorList:
            actorID = df_actors['index:ID'].loc[df_actors['actor'] == act].values[0]
            director_actor = [directorID, actorID, '合作', '合作']
            director_actors.append(director_actor)

    # 生成电影-类型关系
    for ctype in typeList:
        typeID = df_types['index:ID'].loc[df_types['type'] == ctype].values[0]
        film_type = [filmID, typeID, '演出', '演出']
        film_types.append(film_type)


# 导出导演-电影关系文件
df_director_film = pd.DataFrame(data=director_films, columns=[':START_ID', ':END_ID', 'relation', ':TYPE'])
df_director_film.to_csv('out/relationship_director_film.csv', index = False, encoding='utf-8')
print('导演-电影关系导出到CSV成功')

# 导出演员-电影关系文件
df_actor_film = pd.DataFrame(data=actor_films, columns=[':START_ID', ':END_ID', 'relation', ':TYPE'])
df_actor_film.to_csv('out/relationship_actor_film.csv', index=False, encoding='utf-8')
print('演员-电影关系导出到CSV成功')

# 导出导演-演员关系文件
df_director_actor = pd.DataFrame(data=director_actors, columns=[':START_ID', ':END_ID', 'relation', ':TYPE'])
df_director_actor.to_csv('out/relationship_director_actor.csv', index=False, encoding='utf-8')
print('导演-演员关系导出到CSV成功')

# 导出电影-类型关系文件
df_film_type = pd.DataFrame(data=film_types, columns=[':START_ID', ':END_ID', 'relation', ':TYPE'])
df_film_type.to_csv('out/relationship_film_type.csv', index=False, encoding='utf-8')
print('电影-类型关系导出到CSV成功')


